New York - TLC Trip Record Data

The following data is a subset of the NYC taxi dataset which contains the For-Hire Vehicle (“FHV”) trip records only.

For more, please refer to the original website.


In [ ]:
import pandas as pd

URL = "https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_{year}-{month:02}.csv"
COLUMN = "Pickup_date"

def get_monthly_values(year, month, freq):
    """Get monthly pickup counts for given year and time interval 
    frequency.
    
    """
    
    file = URL.format(year=year, month=month)
    
    print("Loading {}".format(file))
    df = pd.read_csv(file, parse_dates=[COLUMN], usecols=[COLUMN])
    size = df.memory_usage(deep=True).sum() / (2**20)
    print("Finished loading. Total size: {:.2f}".format(size))
    
    grouper = pd.Grouper(key=COLUMN, freq=freq)
    counts = df.groupby(grouper).size()
    
    return counts


def fetch_nyc_taxi_pickups(year, file, freq="1h"):
    """Get yearly pickup counts for given time interval frequency
    and save as csv.
    
    """
    
    months = [get_monthly_values(year, x, freq) for x in range(1, 13)]
    df = pd.concat(months).to_frame("Pickup_Count")
    df.to_csv(file)